### Assignment of institutions to states
### 1 March 2023



# Preparation -------------------------------------------------------------

### load packages
library(foreign)
library(ggplot2)
library(tidyr)
library(dplyr)
library(maptools)
library(gridExtra)
library(ggmap)
library(data.table)
library(raster)
library(rgdal)
library(sp)
library(grid)
library(gridExtra)
library(Matching)
library(stargazer)
library(lmtest)
library(sandwich)
library(xtable)
library(geosphere)
library(readxl)
library(rgeos)

### Empty environment
rm(list=ls())

### Set working directory
path <- "" # define path here
setwd(path)





# Load relevant data ------------------------------------------------------

### Parliaments
parliaments <- read.dta(paste(path,"/Raw data/parliamentsVBB.dta", sep=""))

### Cities
cities.bairoch <- read.dta(paste(path,"/Raw data/citiesBairoch.dta", sep=""))
cities.bosker <- read.dta(paste(path,"/Raw data/citiesBosker.dta", sep=""))

### Universities
universities <- read.dta(paste(path,"/Raw data/universities.dta", sep=""))
universities <- subset(universities, founding <= 1850)
universities$year <- universities$founding

### HRE Cantoni
CantoniHRE <- read.dta(paste(path,"/Raw data/HRECantoni.dta", sep=""))





# Write a function to assign institutions to states -----------------------

### function
assignment <- function(year){
  # load shapefile
  if(year<1300){
    shp <- readOGR(paste(path,"/Raw data/Fragmentation Abramson 2017 shapefiles/",
                         year,"_poly.shp",sep=""))
  }
  if(year>=1300){
    shp <- readOGR(paste(path,"/Raw data/Fragmentation Abramson 2017 shapefiles/poly_",
                         year,".shp",sep=""))    
  }
  
  # change CRS to lon/lat
  shp <- spTransform(shp, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))  
  
  # drop duplicate rows
  shp@data <- shp@data[!duplicated(shp@data$Name),]
  
  # create subsets of the data & turn into spatial points
  cities.bairochALL.sub <- unique(cities.bairoch[(cities.bairoch$year_min5k <= year),c("lon","lat")])
  cities.bairochALL.sub <- SpatialPoints(cities.bairochALL.sub[!is.na(cities.bairochALL.sub$lon),])
  proj4string(cities.bairochALL.sub) <- CRS("+proj=longlat +datum=WGS84")
  
  cities.bosker5k.sub <- unique(cities.bosker[cities.bosker$year <= year & cities.bosker$citypop_le5>0 & 
                                              cities.bosker$year == as.numeric(substr(year,1,2))*100,c("lon","lat")])
  cities.bosker5k.sub <- SpatialPoints(cities.bosker5k.sub[!is.na(cities.bosker5k.sub$lon),])
  proj4string(cities.bosker5k.sub) <- CRS("+proj=longlat +datum=WGS84")
  
  parliaments.sub <- unique(parliaments[parliaments$year <= year  & 
                                   parliaments$year == as.numeric(substr(year,1,2))*100 & 
                                   parliaments$parliament==1,c("lon","lat")])
  parliaments.sub <- SpatialPoints(parliaments.sub[!is.na(parliaments.sub$lon),])
  proj4string(parliaments.sub) <- CRS("+proj=longlat +datum=WGS84")
  
  universitiesAll.sub <- universities[universities$year <= year,c("lon","lat")]
  universitiesAll.sub <- SpatialPoints(universitiesAll.sub[!is.na(universitiesAll.sub$lon),])
  proj4string(universitiesAll.sub) <- CRS("+proj=longlat +datum=WGS84")
  
  # implement name changes (code from Abramson 2017)
  shp@data$Name<-ifelse(shp@data$Name =="Balearic islands" ,"Balearic Islands",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Albanians" ,"Albania",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Almavorid_4" ,"Almoravid_4",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Almafi","Amalfi",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Bresancon","Besancon",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Stratsbourg","Strasbourg",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Russians" ,"Russians_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Correggio" ,"Corregio" ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Wurrtemberg","Wurttemberg",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Ertfurt" ,"Erfurt" ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Regensberg" ,"Regensburg"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Ullster","Ulster",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="The  Holy Roman Empire","The Holy Roman Empire",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_1","Fatimid_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_2","Fatimid_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_3","Fatimid_3",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_4","Fatimid_4",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_5","Fatimid_5",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Fatimad_6","Fatimid_6",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Kevian Rus","Kievan Rus",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Lithuania","Lithuanians",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Polaban Slavs","Polabian Slavs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Poblabian Slavs","Polabian Slavs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Pechengens","Pechenegs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Kingdom of Burgandy","Kingdom of Burgundy",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Leistner","Leinster",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Noresmen","Norsemen",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="South_Slavs","South Slavs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Strachclyde","Strathclyde",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaa_1_4_1","Ummayad_1_4_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_1","Ummayad_1_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_2","Ummayad_1_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_3","Ummayad_1_3",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_4","Ummayad_1_4",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_4_1","Ummayad_1_4_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_1_4_2","Ummayad_1_4_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_2","Ummayad_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_3","Ummayad_3",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_3_1","Ummayad_3_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_3_2","Ummayad_3_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_4","Ummayad_4",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_4_1","Ummayad_4_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_4_1_1","Ummayad_4_1_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Ummayaad_4_1_1","Ummayad_4_1_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_4_1_2","Ummayad_4_1_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_4_2","Ummayad_4_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaad_5","Ummayad_5",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayaard_3_1","Ummayad_3_1",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Umayyad_3_2","Ummayad_3_2",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Geona" , "Genoa",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Almovorads", "Almoravids",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Balearic" | shp@data$Name ==         "Balearic islands"  , "Balearic Islands",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Hammadid", "Hammadids",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Norseman", "Norsemen",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Polostk", "Polotsk",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Munstert", "Munster",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Alhomad" | shp@data$Name ==  "Almohad" , "Almohads",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Bresica"  , "Brescia"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Bulgarians"  ,  "Bulgaria"   ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Catile" | shp@data$Name == "Castille", "Castile" ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==    "Cremoa"   ,   "Cremona"    ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==    "Dauhphine"   ,    "Dauphine"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Felte and Belluno"  | shp@data$Name == "Feltre amd Belluno",  "Feltre and Belluno"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==   "Franche Compte" ,    "Franche Comte" ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==   "Kingdom of the Sword" ,     "Knights of the Sword"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==   "Kingdon of Naples"| shp@data$Name == "The Kingdom of Naples" ,  "Kingdom of Naples"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==   "Pistoa"   ,     "Pistoia"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Portgual"     ,    "Portugal"   ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==   "The Republic of Novgorod"      ,   "Republic of Novgorod"   ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==    "Trevisto" ,   "Treviso"   ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Entense","Estense",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Freising","Friesberg",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="GIlan","Gilan",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Granada","Grenada",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name =="Hapsburgs" ,"Habsburgs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Il Khanid Mongols", "Il-Khanid Mongols",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name == "Knights Hospitaller", "Knights Hopitaller",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Luxembourg", "Luxembourgs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Mongol Empires", "Mongols",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Mongol Empire", "Mongols",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Vulga Bulgars", "Volga Bulgars"  ,as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Wittelsbachs", "Wittlesbachs",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Wurtemburg", "Wurttemberg",as.character(shp@data$Name))
  shp@data$Name<-ifelse(shp@data$Name ==  "Hainaut", "Hainault",as.character(shp@data$Name))
  
  shp@data<-subset(shp@data,!is.na(shp@data$Name))
  shp@data$X<-as.numeric(as.character(shp@data$X))
  shp@data$Y<-as.numeric(as.character(shp@data$Y))
  shp@data$Area<-as.numeric(as.character(shp@data$Area))
  shp@data$logArea<-log(shp@data$Area)
  shp@data<-subset(shp@data,shp@data$Area > 6)

  # create empty data frame
  d <- shp@data
  d$year <- year
  d <- d[,c("Name", "year")]
  
  # assign cities and buildings to states
  d <- merge(d, data.frame(table(over(universitiesAll.sub, shp)$Name)), by.x = "Name", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- "UniversitiesAllN" 
  
  d <- merge(d, data.frame(table(over(parliaments.sub, shp)$Name)), by.x = "Name", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("ParliamentsN")  
 
  d <- merge(d, data.frame(table(over(cities.bairochALL.sub, shp)$Name)), by.x = "Name", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("CitiesBairochAllN")
  
  d <- merge(d, data.frame(table(over(cities.bosker5k.sub, shp)$Name)), by.x = "Name", by.y = "Var1", all.x=T)
  colnames(d)[dim(d)[2]] <- c("CitiesBosker5kN")
  
  # code missings as 0
  d$UniversitiesAllN[is.na(d$UniversitiesAllN)] <- 0
  d$ParliamentsN[is.na(d$ParliamentsN)] <- 0
  d$CitiesBairochAllN[is.na(d$CitiesBairochAllN)] <- 0
  d$CitiesBosker5kN[is.na(d$CitiesBosker5kN)] <- 0
  
  # sort data sets
  d <- d[order(d$Name),]
  shp@data <- shp@data[order(shp@data$Name),]
  
  # count number of HRE cities (Cantoni data)
  if(year %in% c(1400,1450,1550,1650,1750)){
    
    CantoniHRE.sub <- CantoniHRE[CantoniHRE$time_point == year,c("longitude", "latitude")]
    CantoniHRE.sub <- SpatialPoints(CantoniHRE.sub[!is.na(CantoniHRE.sub$longitude),])
    proj4string(CantoniHRE.sub) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")  
    
    d <- merge(d, data.frame(table(over(CantoniHRE.sub, shp)$Name)), by.x = "Name", by.y = "Var1", all.x=T)
    colnames(d)[dim(d)[2]] <- "HRE_Cantoni"  
    d$HRE_Cantoni[is.na(d$HRE_Cantoni)] <- 0
  }
  
  if(!(year %in% c(1400,1450,1550,1650,1750))){
    d$HRE_Cantoni <- NA
  }
  
  # dummy: part of the HRE?
  d$HRE_Nuessli <- NA
  
  if(year %in% c(1100,1200,1300,1400,1500,1600,1700)){
    # load shape file
    hre <- readOGR(paste(path,"/Raw data/Nuessli shapefiles HRE/",
                         year,"/supranational_entities.shp",sep=""))
    
    # adjust CRS
    hre <- spTransform(hre, CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")) 
    
    # overlap?
    d$HRE_Nuessli <- sapply(over(shp, hre, returnList=TRUE), nrow)
  }
  
  # output
  return(d)
}


### implement function
# 1100
d <- assignment(year = 1100)

# 1105-1795
for(year in seq(1105,1790,5)){
  print(year)
  d <- rbind(d, assignment(year=year))
}


### save data
write.dta(d, paste(path,"/State file/AssignmentToState.dta", sep=""), convert.factors = "string")





